Load data.
df <- read_csv("pilot-anonymous.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## workerId = col_character(),
## condition = col_character(),
## interact = col_logical(),
## gender = col_character(),
## age = col_character(),
## education = col_character(),
## chart_use = col_character(),
## problems = col_character(),
## interactions = col_character(),
## trial = col_character()
## )
## See spec(...) for full column specifications.
head(df)
## # A tibble: 6 x 34
## workerId batch bonus condition duration interact n_data_conds n_trials gender
## <chr> <dbl> <dbl> <chr> <dbl> <lgl> <dbl> <dbl> <chr>
## 1 fccb21d5 1 0.25 icons 1269. FALSE 16 16 man
## 2 fccb21d5 1 0.25 icons 1269. FALSE 16 16 man
## 3 fccb21d5 1 0.25 icons 1269. FALSE 16 16 man
## 4 fccb21d5 1 0.25 icons 1269. FALSE 16 16 man
## 5 fccb21d5 1 0.25 icons 1269. FALSE 16 16 man
## 6 fccb21d5 1 0.25 icons 1269. FALSE 16 16 man
## # … with 25 more variables: age <chr>, education <chr>, chart_use <chr>,
## # problems <chr>, A <dbl>, B <dbl>, C <dbl>, D <dbl>, abs_err <dbl>,
## # causal_support <dbl>, ground_truth <dbl>, interactions <chr>, n <dbl>,
## # nA <dbl>, nB <dbl>, nC <dbl>, nD <dbl>, p_immun <dbl>, payoff <dbl>,
## # q_idx <dbl>, response_A <dbl>, response_B <dbl>, trial <chr>,
## # trial_dur <dbl>, trial_idx <dbl>
Calculate a log response ratio lrr to model as a function of causal_support. Also, convert predictor variables to factors for modeling if need be.
model_df <- df %>%
unite("vis", condition, interact, sep = "_") %>%
mutate(
# response units
response_A = if_else(
response_A > 99.5, 99.5,
if_else(
response_A < 0.5, 0.5,
as.numeric(response_A))),
response_B = if_else(
response_B > 99.5, 99.5,
if_else(
response_B < 0.5, 0.5,
as.numeric(response_B))),
lrr = log(response_A / 100) - log(response_B / 100),
# predictors as factors
worker = as.factor(workerId),
vis = as.factor(vis),
n = as.factor(n),
# derived predictors
delta_p = (C + D)/(nC + nD) - (A + B)/(nA + nB)
)
Prior predictive check
p1 <- brm(data = model_df, family = "gaussian",
lrr ~ causal_support,
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = sigma)), # weakly informative half-normal
sample_prior = "only",
iter = 3000, warmup = 500, chains = 2, cores = 2)
## Compiling the C++ model
## Start sampling
model_df %>%
select(causal_support, response_A) %>%
add_predicted_draws(p1, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Prior predictive distribution") +
theme(panel.grid = element_blank())
Fit model
m1 <- brm(data = model_df, family = "gaussian",
lrr ~ causal_support,
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = sigma)), # weakly informative half-normal
iter = 3000, warmup = 500, chains = 2, cores = 2,
file = "model-fits/1_simple")
Diagnostics
summary(m1)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: lrr ~ causal_support
## Data: model_df (Number of observations: 306)
## Samples: 2 chains, each with iter = 3000; warmup = 500; thin = 1;
## total post-warmup samples = 5000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -0.62 0.10 -0.81 -0.43 1.00 5337 3011
## causal_support 0.08 0.01 0.06 0.11 1.00 6992 4198
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.61 0.06 1.49 1.74 1.00 4959 4021
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m1)
pairs(m1)
Posterior predictive
model_df %>%
select(causal_support, response_A) %>%
add_predicted_draws(m1, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Posterior predictive distribution") +
theme(panel.grid = element_blank())
Prior predictive check
p2 <- brm(data = model_df, family = "gaussian",
lrr ~ causal_support*delta_p*n,
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 1), class = b), # center predictor effects at 0
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = sigma)), # weakly informative half-normal
sample_prior = "only",
iter = 3000, warmup = 500, chains = 2, cores = 2)
## Compiling the C++ model
## Start sampling
expand_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 50))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
n = unique(model_df$n)) %>%
add_predicted_draws(p2, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Prior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_grid(n ~ .)
Fit model
m2 <- brm(data = model_df, family = "gaussian",
lrr ~ causal_support*delta_p*n,
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 1), class = b), # center predictor effects at 0
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = sigma)), # weakly informative half-normal
iter = 3000, warmup = 500, chains = 2, cores = 2,
file = "model-fits/2_data-conds")
Diagnostics
summary(m2)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: lrr ~ causal_support * delta_p * n
## Data: model_df (Number of observations: 306)
## Samples: 2 chains, each with iter = 3000; warmup = 500; thin = 1;
## total post-warmup samples = 5000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept -0.78 0.18 -1.13 -0.43 1.00 3655
## causal_support 0.40 0.17 0.06 0.72 1.00 1396
## delta_p 0.36 0.97 -1.57 2.19 1.00 5381
## n500 0.28 0.25 -0.22 0.76 1.00 3730
## n1000 0.21 0.26 -0.31 0.70 1.00 3771
## n5000 0.30 0.26 -0.22 0.82 1.00 4468
## causal_support:delta_p -0.93 0.68 -2.24 0.42 1.00 4779
## causal_support:n500 -0.12 0.19 -0.50 0.26 1.00 1633
## causal_support:n1000 0.00 0.18 -0.34 0.36 1.00 1511
## causal_support:n5000 -0.31 0.17 -0.64 0.03 1.00 1386
## delta_p:n500 0.01 0.97 -1.87 1.90 1.00 5595
## delta_p:n1000 0.06 0.99 -1.87 2.02 1.00 8708
## delta_p:n5000 0.10 1.02 -1.92 2.08 1.00 6118
## causal_support:delta_p:n500 -0.11 0.96 -1.95 1.77 1.00 5464
## causal_support:delta_p:n1000 -1.01 0.91 -2.80 0.77 1.00 5441
## causal_support:delta_p:n5000 0.39 0.75 -1.06 1.85 1.00 4459
## Tail_ESS
## Intercept 3613
## causal_support 2283
## delta_p 3262
## n500 3434
## n1000 3714
## n5000 3908
## causal_support:delta_p 3745
## causal_support:n500 2651
## causal_support:n1000 2493
## causal_support:n5000 2170
## delta_p:n500 3811
## delta_p:n1000 4044
## delta_p:n5000 3733
## causal_support:delta_p:n500 3211
## causal_support:delta_p:n1000 3985
## causal_support:delta_p:n5000 4070
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.55 0.06 1.44 1.68 1.00 6169 3856
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m2)
# intercepts
pairs(m2, pars = c("b_Intercept", "b_delta_p", "b_n500", "b_n1000", "b_n5000"))
# slopes on causal support
pairs(m2, pars = c("b_causal_support", "b_causal_support:delta_p", "b_causal_support:n500", "b_causal_support:n1000", "b_causal_support:n5000"))
# slopes on delta_p and sigma
pairs(m2, pars = c("b_delta_p", "b_delta_p:n500", "b_delta_p:n1000", "b_delta_p:n5000", "sigma"))
# slope interactions
pairs(m2, pars = c("b_causal_support:delta_p", "b_causal_support:delta_p:n500", "b_causal_support:delta_p:n1000", "b_causal_support:delta_p:n5000"))
Posterior predictive
expand_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 50))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
n = unique(model_df$n)) %>%
add_predicted_draws(m2, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Posterior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_grid(n ~ .)
Prior predictive check
p3 <- brm(data = model_df, family = "gaussian",
lrr ~ causal_support*delta_p*n*vis,
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 1), class = b), # center predictor effects at 0
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = sigma)), # weakly informative half-normal
sample_prior = "only",
iter = 3000, warmup = 500, chains = 2, cores = 2)
## Compiling the C++ model
## Start sampling
expand_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 50))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
n = unique(model_df$n),
vis = unique(model_df$vis)) %>%
add_predicted_draws(p3, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Prior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_grid(n ~ vis)
Fit model
m3 <- brm(data = model_df, family = "gaussian",
lrr ~ causal_support*delta_p*n*vis,
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 1), class = b), # center predictor effects at 0
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = sigma)), # weakly informative half-normal
iter = 3000, warmup = 500, chains = 2, cores = 2,
file = "model-fits/3_vis")
Diagnostics
summary(m3)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: lrr ~ causal_support * delta_p * n * vis
## Data: model_df (Number of observations: 306)
## Samples: 2 chains, each with iter = 3000; warmup = 500; thin = 1;
## total post-warmup samples = 5000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -0.57 0.22 -1.00 -0.14
## causal_support 0.51 0.20 0.13 0.91
## delta_p 0.41 1.00 -1.57 2.39
## n500 0.07 0.31 -0.55 0.66
## n1000 -0.21 0.32 -0.85 0.40
## n5000 0.42 0.32 -0.19 1.04
## vistext_FALSE -0.46 0.29 -1.03 0.11
## causal_support:delta_p -1.03 0.73 -2.48 0.45
## causal_support:n500 -0.21 0.23 -0.67 0.24
## causal_support:n1000 -0.09 0.21 -0.52 0.32
## causal_support:n5000 -0.44 0.20 -0.84 -0.06
## delta_p:n500 0.01 0.99 -1.94 1.88
## delta_p:n1000 0.07 1.03 -1.96 2.02
## delta_p:n5000 0.10 1.00 -1.87 2.03
## causal_support:vistext_FALSE -0.28 0.27 -0.81 0.25
## delta_p:vistext_FALSE -0.09 0.98 -2.06 1.78
## n500:vistext_FALSE 0.48 0.41 -0.30 1.29
## n1000:vistext_FALSE 0.94 0.43 0.10 1.78
## n5000:vistext_FALSE -0.22 0.43 -1.07 0.62
## causal_support:delta_p:n500 -0.12 0.94 -1.96 1.70
## causal_support:delta_p:n1000 -1.01 0.90 -2.75 0.82
## causal_support:delta_p:n5000 0.20 0.78 -1.32 1.72
## causal_support:delta_p:vistext_FALSE -0.12 0.82 -1.74 1.47
## causal_support:n500:vistext_FALSE 0.25 0.31 -0.36 0.87
## causal_support:n1000:vistext_FALSE 0.28 0.29 -0.27 0.85
## causal_support:n5000:vistext_FALSE 0.35 0.27 -0.18 0.88
## delta_p:n500:vistext_FALSE -0.08 1.01 -2.06 1.84
## delta_p:n1000:vistext_FALSE -0.02 0.98 -1.91 1.90
## delta_p:n5000:vistext_FALSE 0.01 1.02 -1.98 2.00
## causal_support:delta_p:n500:vistext_FALSE 0.23 0.97 -1.64 2.14
## causal_support:delta_p:n1000:vistext_FALSE -0.23 0.97 -2.20 1.64
## causal_support:delta_p:n5000:vistext_FALSE -0.04 0.86 -1.72 1.69
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.00 3519 4156
## causal_support 1.00 1402 2427
## delta_p 1.00 8551 3954
## n500 1.00 3897 3925
## n1000 1.00 3865 3775
## n5000 1.00 4271 4117
## vistext_FALSE 1.00 3103 3584
## causal_support:delta_p 1.00 5276 4038
## causal_support:n500 1.00 1735 2940
## causal_support:n1000 1.00 1559 2623
## causal_support:n5000 1.00 1412 2479
## delta_p:n500 1.00 6487 3547
## delta_p:n1000 1.00 7262 3631
## delta_p:n5000 1.00 6861 3882
## causal_support:vistext_FALSE 1.00 1429 2666
## delta_p:vistext_FALSE 1.00 7372 3700
## n500:vistext_FALSE 1.00 3458 4038
## n1000:vistext_FALSE 1.00 3913 3647
## n5000:vistext_FALSE 1.00 3957 3950
## causal_support:delta_p:n500 1.00 7322 3461
## causal_support:delta_p:n1000 1.00 6163 4096
## causal_support:delta_p:n5000 1.00 5192 4015
## causal_support:delta_p:vistext_FALSE 1.00 8452 4146
## causal_support:n500:vistext_FALSE 1.00 1814 2937
## causal_support:n1000:vistext_FALSE 1.00 1539 2759
## causal_support:n5000:vistext_FALSE 1.00 1447 2552
## delta_p:n500:vistext_FALSE 1.00 7424 3996
## delta_p:n1000:vistext_FALSE 1.00 7829 3667
## delta_p:n5000:vistext_FALSE 1.00 7774 3986
## causal_support:delta_p:n500:vistext_FALSE 1.00 7063 3514
## causal_support:delta_p:n1000:vistext_FALSE 1.00 8150 3422
## causal_support:delta_p:n5000:vistext_FALSE 1.00 6815 4045
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 1.54 0.07 1.41 1.67 1.00 7060 3786
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m3)
Too many pairs to plot, so we’ll only do it as needed to check issues from here on out.
Posterior predictive
expand_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
n = unique(model_df$n),
vis = unique(model_df$vis)) %>%
add_predicted_draws(m3, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Posterior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_grid(n ~ vis)
Prior predictive check
p4 <- brm(data = model_df, family = "gaussian",
formula = bf(lrr ~ causal_support*delta_p*n*vis,
sigma ~ abs(causal_support)),
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 1), class = b), # center predictor effects at 0
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = b, dpar = sigma), # weakly informative half-normal
prior(normal(0, 1), class = Intercept, dpar = sigma)), # weakly informative half-normal
sample_prior = "only",
iter = 3000, warmup = 500, chains = 2, cores = 2)
## Compiling the C++ model
## Start sampling
expand_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 50))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
n = unique(model_df$n),
vis = unique(model_df$vis)) %>%
add_predicted_draws(p4, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Prior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_grid(n ~ vis)
Fit model
m4 <- brm(data = model_df, family = "gaussian",
formula = bf(lrr ~ causal_support*delta_p*n*vis,
sigma ~ abs(causal_support)),
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 1), class = b), # center predictor effects at 0
prior(normal(1, 2), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 1), class = b, dpar = sigma), # weakly informative half-normal
prior(normal(0, 1), class = Intercept, dpar = sigma)), # weakly informative half-normal
iter = 3000, warmup = 500, chains = 2, cores = 2,
file = "model-fits/4_var")
Diagnostics
summary(m4)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: lrr ~ causal_support * delta_p * n * vis
## sigma ~ abs(causal_support)
## Data: model_df (Number of observations: 306)
## Samples: 2 chains, each with iter = 3000; warmup = 500; thin = 1;
## total post-warmup samples = 5000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -0.58 0.23 -1.03 -0.13
## sigma_Intercept 0.46 0.05 0.37 0.56
## causal_support 0.52 0.20 0.12 0.92
## delta_p 0.37 0.95 -1.50 2.24
## n500 0.07 0.32 -0.54 0.70
## n1000 -0.21 0.33 -0.86 0.45
## n5000 0.47 0.32 -0.16 1.12
## vistext_FALSE -0.45 0.31 -1.06 0.16
## causal_support:delta_p -1.06 0.72 -2.45 0.36
## causal_support:n500 -0.22 0.24 -0.70 0.25
## causal_support:n1000 -0.10 0.22 -0.51 0.33
## causal_support:n5000 -0.46 0.20 -0.86 -0.05
## delta_p:n500 -0.00 1.01 -2.00 1.97
## delta_p:n1000 0.04 1.01 -1.91 2.00
## delta_p:n5000 0.09 0.98 -1.84 2.01
## causal_support:vistext_FALSE -0.29 0.28 -0.82 0.25
## delta_p:vistext_FALSE -0.09 0.99 -2.09 1.82
## n500:vistext_FALSE 0.48 0.43 -0.36 1.33
## n1000:vistext_FALSE 0.91 0.44 0.05 1.79
## n5000:vistext_FALSE -0.32 0.45 -1.21 0.57
## causal_support:delta_p:n500 -0.13 0.96 -2.00 1.76
## causal_support:delta_p:n1000 -0.99 0.91 -2.73 0.78
## causal_support:delta_p:n5000 0.17 0.75 -1.31 1.66
## causal_support:delta_p:vistext_FALSE -0.14 0.84 -1.81 1.49
## causal_support:n500:vistext_FALSE 0.26 0.32 -0.36 0.90
## causal_support:n1000:vistext_FALSE 0.29 0.29 -0.27 0.86
## causal_support:n5000:vistext_FALSE 0.36 0.28 -0.17 0.89
## delta_p:n500:vistext_FALSE -0.04 0.99 -2.02 1.91
## delta_p:n1000:vistext_FALSE -0.01 1.02 -2.03 2.04
## delta_p:n5000:vistext_FALSE 0.04 1.02 -1.95 2.01
## causal_support:delta_p:n500:vistext_FALSE 0.23 0.99 -1.74 2.17
## causal_support:delta_p:n1000:vistext_FALSE -0.20 0.99 -2.10 1.75
## causal_support:delta_p:n5000:vistext_FALSE -0.04 0.86 -1.72 1.65
## sigma_abscausal_support -0.01 0.01 -0.02 0.00
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.00 2882 3040
## sigma_Intercept 1.00 5472 3979
## causal_support 1.00 1215 1850
## delta_p 1.00 5249 3460
## n500 1.00 3159 3451
## n1000 1.00 2672 3183
## n5000 1.00 3049 3326
## vistext_FALSE 1.00 2433 3057
## causal_support:delta_p 1.00 4032 3966
## causal_support:n500 1.00 1526 2239
## causal_support:n1000 1.00 1303 2108
## causal_support:n5000 1.00 1215 1803
## delta_p:n500 1.00 6844 3544
## delta_p:n1000 1.00 5725 3444
## delta_p:n5000 1.00 5867 3867
## causal_support:vistext_FALSE 1.00 1415 2465
## delta_p:vistext_FALSE 1.00 4855 3689
## n500:vistext_FALSE 1.00 2857 3367
## n1000:vistext_FALSE 1.00 2543 3497
## n5000:vistext_FALSE 1.00 3013 3366
## causal_support:delta_p:n500 1.00 7162 3377
## causal_support:delta_p:n1000 1.00 4008 3544
## causal_support:delta_p:n5000 1.00 4383 3936
## causal_support:delta_p:vistext_FALSE 1.00 4328 3516
## causal_support:n500:vistext_FALSE 1.00 1693 2761
## causal_support:n1000:vistext_FALSE 1.00 1538 2503
## causal_support:n5000:vistext_FALSE 1.00 1439 2440
## delta_p:n500:vistext_FALSE 1.00 5331 3691
## delta_p:n1000:vistext_FALSE 1.00 6560 3663
## delta_p:n5000:vistext_FALSE 1.00 7370 3901
## causal_support:delta_p:n500:vistext_FALSE 1.00 5940 3678
## causal_support:delta_p:n1000:vistext_FALSE 1.00 5921 3474
## causal_support:delta_p:n5000:vistext_FALSE 1.00 5550 3216
## sigma_abscausal_support 1.00 8307 3729
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m4)
Posterior predictive
expand_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
n = unique(model_df$n),
vis = unique(model_df$vis)) %>%
add_predicted_draws(m4, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep)) +
stat_lineribbon(.width = c(0.95, 0.8, 0.5)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer() +
labs(subtitle = "Posterior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_grid(n ~ vis)
Prior predictive check. Now that we’ve added hierarchy we’ll narrow our priors to get a bit of regularization.
p5 <- brm(data = model_df, family = "gaussian",
formula = bf(lrr ~ causal_support*delta_p*n*vis + (causal_support|workerId),
sigma ~ abs(causal_support) + (1|workerId)),# + (abs(causal_support)|workerId)),
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 0.5), class = b), # center predictor effects at 0
prior(normal(1, 0.5), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 0.2), class = b, dpar = sigma), # weakly informative half-normal
prior(normal(0, 0.2), class = Intercept, dpar = sigma), # weakly informative half-normal
prior(normal(0, 0.2), class = sd), # weakly informative half-normal
prior(lkj(4), class = cor)), # avoiding large correlations
sample_prior = "only",
iter = 3000, warmup = 500, chains = 2, cores = 2)
## Compiling the C++ model
## Start sampling
## Warning: There were 2 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
model_df %>%
group_by(n, vis, workerId) %>%
data_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 10)))) %>%
add_predicted_draws(p5, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep, fill = vis)) +
stat_lineribbon(.width = c(0.95)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer(type = "qual", palette = 2) +
scale_color_brewer(type = "qual", palette = 2) +
labs(subtitle = "Prior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_wrap(workerId ~ .)
Fit model. Random slope effects on sigma did not work out.
m5 <- brm(data = model_df, family = "gaussian",
formula = bf(lrr ~ causal_support*delta_p*n*vis + (causal_support|workerId),
sigma ~ abs(causal_support) + (1|workerId)),# + (abs(causal_support)|workerId)),
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 0.5), class = b), # center predictor effects at 0
prior(normal(1, 0.5), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 0.2), class = b, dpar = sigma), # weakly informative half-normal
prior(normal(0, 0.2), class = Intercept, dpar = sigma), # weakly informative half-normal
prior(normal(0, 0.2), class = sd), # weakly informative half-normal
prior(lkj(4), class = cor)), # avoiding large correlations
iter = 3000, warmup = 500, chains = 2, cores = 2,
control = list(adapt_delta = 0.99, max_treedepth = 12),
file = "model-fits/5b_re-simple")
Diagnostics
summary(m5)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: lrr ~ causal_support * delta_p * n * vis + (causal_support | workerId)
## sigma ~ abs(causal_support) + (1 | workerId)
## Data: model_df (Number of observations: 306)
## Samples: 2 chains, each with iter = 3000; warmup = 500; thin = 1;
## total post-warmup samples = 5000
##
## Group-Level Effects:
## ~workerId (Number of levels: 18)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 0.63 0.09 0.46 0.80 1.00
## sd(causal_support) 0.07 0.02 0.04 0.12 1.00
## sd(sigma_Intercept) 0.46 0.10 0.29 0.70 1.00
## cor(Intercept,causal_support) -0.43 0.17 -0.73 -0.05 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 2836 3542
## sd(causal_support) 1896 3050
## sd(sigma_Intercept) 1516 2534
## cor(Intercept,causal_support) 2518 3213
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -0.58 0.24 -1.07 -0.11
## sigma_Intercept 0.17 0.11 -0.05 0.38
## causal_support 0.34 0.14 0.07 0.63
## delta_p 0.17 0.50 -0.79 1.15
## n500 -0.04 0.18 -0.40 0.31
## n1000 0.10 0.18 -0.28 0.45
## n5000 0.28 0.19 -0.09 0.65
## vistext_FALSE -0.19 0.31 -0.77 0.42
## causal_support:delta_p -0.60 0.39 -1.35 0.17
## causal_support:n500 -0.18 0.15 -0.48 0.12
## causal_support:n1000 -0.14 0.15 -0.44 0.14
## causal_support:n5000 -0.27 0.14 -0.55 0.00
## delta_p:n500 0.09 0.50 -0.87 1.06
## delta_p:n1000 0.01 0.50 -0.94 0.94
## delta_p:n5000 0.04 0.51 -0.94 1.02
## causal_support:vistext_FALSE 0.01 0.21 -0.41 0.40
## delta_p:vistext_FALSE 0.01 0.51 -0.98 1.05
## n500:vistext_FALSE 0.20 0.29 -0.37 0.76
## n1000:vistext_FALSE 0.27 0.29 -0.29 0.84
## n5000:vistext_FALSE -0.06 0.29 -0.63 0.52
## causal_support:delta_p:n500 -0.05 0.47 -0.93 0.89
## causal_support:delta_p:n1000 -0.40 0.47 -1.30 0.51
## causal_support:delta_p:n5000 0.06 0.42 -0.77 0.85
## causal_support:delta_p:vistext_FALSE -0.20 0.46 -1.09 0.71
## causal_support:n500:vistext_FALSE 0.18 0.22 -0.26 0.63
## causal_support:n1000:vistext_FALSE 0.19 0.21 -0.22 0.60
## causal_support:n5000:vistext_FALSE 0.03 0.20 -0.37 0.43
## delta_p:n500:vistext_FALSE 0.01 0.50 -0.97 1.01
## delta_p:n1000:vistext_FALSE -0.00 0.49 -0.97 0.96
## delta_p:n5000:vistext_FALSE 0.01 0.51 -0.98 1.01
## causal_support:delta_p:n500:vistext_FALSE 0.06 0.50 -0.93 1.04
## causal_support:delta_p:n1000:vistext_FALSE -0.10 0.49 -1.07 0.88
## causal_support:delta_p:n5000:vistext_FALSE -0.06 0.47 -0.99 0.84
## sigma_abscausal_support -0.02 0.01 -0.04 0.00
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.00 1771 2867
## sigma_Intercept 1.00 1516 2455
## causal_support 1.00 1479 2454
## delta_p 1.00 6951 3567
## n500 1.00 4332 3753
## n1000 1.00 4318 4220
## n5000 1.00 4300 3605
## vistext_FALSE 1.00 2240 3185
## causal_support:delta_p 1.00 5376 3934
## causal_support:n500 1.00 1671 2895
## causal_support:n1000 1.00 1605 2601
## causal_support:n5000 1.00 1513 2610
## delta_p:n500 1.00 6733 3471
## delta_p:n1000 1.00 7645 3937
## delta_p:n5000 1.00 8993 3933
## causal_support:vistext_FALSE 1.00 1633 2351
## delta_p:vistext_FALSE 1.00 8695 3346
## n500:vistext_FALSE 1.00 4482 4013
## n1000:vistext_FALSE 1.00 4447 3853
## n5000:vistext_FALSE 1.00 3904 3656
## causal_support:delta_p:n500 1.00 6415 3719
## causal_support:delta_p:n1000 1.00 8123 3104
## causal_support:delta_p:n5000 1.00 5846 4157
## causal_support:delta_p:vistext_FALSE 1.00 6713 3904
## causal_support:n500:vistext_FALSE 1.00 1875 2680
## causal_support:n1000:vistext_FALSE 1.00 1799 2443
## causal_support:n5000:vistext_FALSE 1.00 1672 2152
## delta_p:n500:vistext_FALSE 1.00 9810 3611
## delta_p:n1000:vistext_FALSE 1.00 7501 3428
## delta_p:n5000:vistext_FALSE 1.00 9422 3182
## causal_support:delta_p:n500:vistext_FALSE 1.00 9237 3478
## causal_support:delta_p:n1000:vistext_FALSE 1.00 9300 3438
## causal_support:delta_p:n5000:vistext_FALSE 1.00 5302 3693
## sigma_abscausal_support 1.00 3609 3557
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m5)
Posterior predictive
model_df %>%
group_by(n, vis, workerId) %>%
data_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 10)))) %>%
add_predicted_draws(m5, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep, fill = vis)) +
stat_lineribbon(.width = c(0.95)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer(type = "qual", palette = 2) +
scale_color_brewer(type = "qual", palette = 2) +
labs(subtitle = "Posterior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_wrap(workerId ~ .)
Same priors as before.
Fit model. Full interaction of random slopes did not work.
m6 <- brm(data = model_df, family = "gaussian",
formula = bf(lrr ~ causal_support*delta_p*n*vis + (causal_support:delta_p + causal_support:n|workerId), #+ (causal_support*delta_p*n|workerId),
sigma ~ abs(causal_support) + (1|workerId)),
prior = c(prior(normal(-0.21, 1), class = Intercept), # center at qlogis(mean(model_df$response_A) / 100)
prior(normal(0, 0.5), class = b), # center predictor effects at 0
prior(normal(1, 0.5), class = b, coef = causal_support), # center at unbiased slope
prior(normal(0, 0.2), class = b, dpar = sigma), # weakly informative half-normal
prior(normal(0, 0.2), class = Intercept, dpar = sigma), # weakly informative half-normal
prior(normal(0, 0.2), class = sd), # weakly informative half-normal
prior(lkj(4), class = cor)), # avoiding large correlations
iter = 3000, warmup = 500, chains = 2, cores = 2,
control = list(adapt_delta = 0.99, max_treedepth = 12),
file = "model-fits/6_re-data_conds")
Diagnostics
summary(m6)
## Family: gaussian
## Links: mu = identity; sigma = log
## Formula: lrr ~ causal_support * delta_p * n * vis + (causal_support:delta_p + causal_support:n | workerId)
## sigma ~ abs(causal_support) + (1 | workerId)
## Data: model_df (Number of observations: 306)
## Samples: 2 chains, each with iter = 3000; warmup = 500; thin = 1;
## total post-warmup samples = 5000
##
## Group-Level Effects:
## ~workerId (Number of levels: 18)
## Estimate Est.Error l-95% CI
## sd(Intercept) 0.60 0.09 0.45
## sd(causal_support:delta_p) 0.20 0.15 0.01
## sd(causal_support:n500) 0.14 0.09 0.01
## sd(causal_support:n1000) 0.21 0.07 0.08
## sd(causal_support:n5000) 0.05 0.02 0.02
## sd(sigma_Intercept) 0.46 0.11 0.29
## cor(Intercept,causal_support:delta_p) -0.03 0.28 -0.56
## cor(Intercept,causal_support:n500) -0.10 0.25 -0.57
## cor(causal_support:delta_p,causal_support:n500) 0.02 0.29 -0.54
## cor(Intercept,causal_support:n1000) -0.30 0.20 -0.66
## cor(causal_support:delta_p,causal_support:n1000) 0.06 0.29 -0.51
## cor(causal_support:n500,causal_support:n1000) 0.19 0.28 -0.38
## cor(Intercept,causal_support:n5000) -0.35 0.19 -0.69
## cor(causal_support:delta_p,causal_support:n5000) 0.02 0.28 -0.53
## cor(causal_support:n500,causal_support:n5000) 0.17 0.27 -0.41
## cor(causal_support:n1000,causal_support:n5000) 0.41 0.23 -0.09
## u-95% CI Rhat Bulk_ESS
## sd(Intercept) 0.79 1.00 3127
## sd(causal_support:delta_p) 0.55 1.00 2070
## sd(causal_support:n500) 0.35 1.00 1328
## sd(causal_support:n1000) 0.36 1.00 1449
## sd(causal_support:n5000) 0.09 1.00 1451
## sd(sigma_Intercept) 0.73 1.00 1526
## cor(Intercept,causal_support:delta_p) 0.53 1.00 5190
## cor(Intercept,causal_support:n500) 0.41 1.00 4120
## cor(causal_support:delta_p,causal_support:n500) 0.56 1.00 3374
## cor(Intercept,causal_support:n1000) 0.12 1.00 3351
## cor(causal_support:delta_p,causal_support:n1000) 0.60 1.00 1857
## cor(causal_support:n500,causal_support:n1000) 0.67 1.00 2152
## cor(Intercept,causal_support:n5000) 0.06 1.00 3074
## cor(causal_support:delta_p,causal_support:n5000) 0.55 1.00 2116
## cor(causal_support:n500,causal_support:n5000) 0.65 1.00 2237
## cor(causal_support:n1000,causal_support:n5000) 0.78 1.00 2971
## Tail_ESS
## sd(Intercept) 2846
## sd(causal_support:delta_p) 2277
## sd(causal_support:n500) 1866
## sd(causal_support:n1000) 1034
## sd(causal_support:n5000) 875
## sd(sigma_Intercept) 2825
## cor(Intercept,causal_support:delta_p) 3431
## cor(Intercept,causal_support:n500) 3457
## cor(causal_support:delta_p,causal_support:n500) 3607
## cor(Intercept,causal_support:n1000) 3368
## cor(causal_support:delta_p,causal_support:n1000) 3036
## cor(causal_support:n500,causal_support:n1000) 3136
## cor(Intercept,causal_support:n5000) 3097
## cor(causal_support:delta_p,causal_support:n5000) 3266
## cor(causal_support:n500,causal_support:n5000) 2970
## cor(causal_support:n1000,causal_support:n5000) 3411
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -0.56 0.23 -1.01 -0.10
## sigma_Intercept 0.16 0.11 -0.07 0.37
## causal_support 0.35 0.14 0.08 0.62
## delta_p 0.15 0.51 -0.82 1.11
## n500 -0.05 0.18 -0.40 0.31
## n1000 0.12 0.18 -0.24 0.47
## n5000 0.27 0.18 -0.09 0.63
## vistext_FALSE -0.22 0.30 -0.80 0.36
## causal_support:delta_p -0.53 0.41 -1.34 0.27
## causal_support:n500 -0.18 0.16 -0.49 0.15
## causal_support:n1000 -0.06 0.16 -0.37 0.25
## causal_support:n5000 -0.28 0.14 -0.55 -0.01
## delta_p:n500 0.08 0.49 -0.88 1.04
## delta_p:n1000 0.01 0.49 -0.92 0.96
## delta_p:n5000 0.04 0.50 -0.93 1.01
## causal_support:vistext_FALSE -0.00 0.20 -0.39 0.40
## delta_p:vistext_FALSE 0.00 0.50 -0.94 0.99
## n500:vistext_FALSE 0.22 0.29 -0.34 0.77
## n1000:vistext_FALSE 0.29 0.28 -0.26 0.83
## n5000:vistext_FALSE -0.08 0.28 -0.66 0.47
## causal_support:delta_p:n500 -0.07 0.49 -1.00 0.91
## causal_support:delta_p:n1000 -0.25 0.49 -1.21 0.74
## causal_support:delta_p:n5000 0.04 0.41 -0.79 0.85
## causal_support:delta_p:vistext_FALSE -0.14 0.46 -1.03 0.78
## causal_support:n500:vistext_FALSE 0.16 0.23 -0.32 0.61
## causal_support:n1000:vistext_FALSE 0.10 0.23 -0.34 0.54
## causal_support:n5000:vistext_FALSE 0.05 0.21 -0.37 0.44
## delta_p:n500:vistext_FALSE 0.01 0.50 -0.96 0.98
## delta_p:n1000:vistext_FALSE -0.00 0.50 -0.99 0.99
## delta_p:n5000:vistext_FALSE 0.02 0.50 -0.95 1.00
## causal_support:delta_p:n500:vistext_FALSE 0.06 0.50 -0.93 1.04
## causal_support:delta_p:n1000:vistext_FALSE -0.06 0.50 -1.03 0.94
## causal_support:delta_p:n5000:vistext_FALSE -0.06 0.47 -0.97 0.84
## sigma_abscausal_support -0.02 0.01 -0.03 0.00
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.00 1860 2484
## sigma_Intercept 1.00 1591 2126
## causal_support 1.00 1813 2806
## delta_p 1.00 7278 3189
## n500 1.00 4894 3974
## n1000 1.00 4335 4269
## n5000 1.00 3952 3368
## vistext_FALSE 1.00 2550 3360
## causal_support:delta_p 1.00 4834 3546
## causal_support:n500 1.00 2155 3159
## causal_support:n1000 1.00 2056 3182
## causal_support:n5000 1.00 1784 2631
## delta_p:n500 1.00 8715 3410
## delta_p:n1000 1.00 6750 3327
## delta_p:n5000 1.00 6067 3670
## causal_support:vistext_FALSE 1.00 1634 2196
## delta_p:vistext_FALSE 1.00 6644 3673
## n500:vistext_FALSE 1.00 3878 3940
## n1000:vistext_FALSE 1.00 3937 3412
## n5000:vistext_FALSE 1.00 4246 3907
## causal_support:delta_p:n500 1.00 6397 3544
## causal_support:delta_p:n1000 1.00 5542 3711
## causal_support:delta_p:n5000 1.00 4917 4021
## causal_support:delta_p:vistext_FALSE 1.00 6762 3793
## causal_support:n500:vistext_FALSE 1.00 2054 2876
## causal_support:n1000:vistext_FALSE 1.00 1841 2638
## causal_support:n5000:vistext_FALSE 1.00 1657 2350
## delta_p:n500:vistext_FALSE 1.00 5692 3882
## delta_p:n1000:vistext_FALSE 1.00 8084 3613
## delta_p:n5000:vistext_FALSE 1.00 7685 3420
## causal_support:delta_p:n500:vistext_FALSE 1.00 6769 3631
## causal_support:delta_p:n1000:vistext_FALSE 1.00 6332 3928
## causal_support:delta_p:n5000:vistext_FALSE 1.00 5713 3886
## sigma_abscausal_support 1.00 3062 3742
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m6)
Posterior predictive
model_df %>%
group_by(n, vis, workerId) %>%
data_grid(
causal_support = quantile(model_df$causal_support, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20))),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 10)))) %>%
add_predicted_draws(m6, prediction = "lrr_rep", seed = 1234, n = 500) %>%
mutate(
# transform to probability units
response_A_rep = plogis(lrr_rep) * 100
) %>%
ggplot(aes(x = causal_support, y = response_A_rep, fill = vis)) +
stat_lineribbon(.width = c(0.95)) +
geom_point(data = model_df, aes(y = response_A), alpha = 0.5) +
scale_fill_brewer(type = "qual", palette = 2) +
scale_color_brewer(type = "qual", palette = 2) +
labs(subtitle = "Posterior predictive distribution") +
theme(panel.grid = element_blank()) +
facet_wrap(workerId ~ .)
loo(m1, m2, m3, m4, m5, m6)
## Warning: Found 11 observations with a pareto_k > 0.7 in model 'm5'. With this
## many problematic observations, it may be more appropriate to use 'kfold' with
## argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
## Warning: Found 15 observations with a pareto_k > 0.7 in model 'm6'. With this
## many problematic observations, it may be more appropriate to use 'kfold' with
## argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
## Output of model 'm1':
##
## Computed from 5000 by 306 log-likelihood matrix
##
## Estimate SE
## elpd_loo -581.9 13.6
## p_loo 3.2 0.5
## looic 1163.7 27.2
## ------
## Monte Carlo SE of elpd_loo is 0.0.
##
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'm2':
##
## Computed from 5000 by 306 log-likelihood matrix
##
## Estimate SE
## elpd_loo -575.3 14.1
## p_loo 12.1 2.0
## looic 1150.5 28.3
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 304 99.3% 250
## (0.5, 0.7] (ok) 2 0.7% 491
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'm3':
##
## Computed from 5000 by 306 log-likelihood matrix
##
## Estimate SE
## elpd_loo -576.3 13.5
## p_loo 19.1 2.6
## looic 1152.6 27.0
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 303 99.0% 602
## (0.5, 0.7] (ok) 3 1.0% 214
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'm4':
##
## Computed from 5000 by 306 log-likelihood matrix
##
## Estimate SE
## elpd_loo -576.9 13.5
## p_loo 21.2 3.1
## looic 1153.8 27.1
## ------
## Monte Carlo SE of elpd_loo is 0.1.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 304 99.3% 490
## (0.5, 0.7] (ok) 2 0.7% 196
## (0.7, 1] (bad) 0 0.0% <NA>
## (1, Inf) (very bad) 0 0.0% <NA>
##
## All Pareto k estimates are ok (k < 0.7).
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'm5':
##
## Computed from 5000 by 306 log-likelihood matrix
##
## Estimate SE
## elpd_loo -511.3 16.1
## p_loo 57.5 5.5
## looic 1022.7 32.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 270 88.2% 1042
## (0.5, 0.7] (ok) 25 8.2% 125
## (0.7, 1] (bad) 11 3.6% 65
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'm6':
##
## Computed from 5000 by 306 log-likelihood matrix
##
## Estimate SE
## elpd_loo -514.5 15.8
## p_loo 63.9 5.7
## looic 1029.0 31.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 258 84.3% 501
## (0.5, 0.7] (ok) 33 10.8% 207
## (0.7, 1] (bad) 14 4.6% 56
## (1, Inf) (very bad) 1 0.3% 49
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## m5 0.0 0.0
## m6 -3.1 2.7
## m2 -63.9 9.8
## m3 -64.9 9.8
## m4 -65.6 10.0
## m1 -70.5 11.1
Derive linear in log odds slopes
slopes_df <- model_df %>%
group_by(n, vis, workerId) %>%
data_grid(
causal_support = c(0, 1),
delta_p = quantile(model_df$delta_p, probs = plogis(seq(from = qlogis(0.001), to = qlogis(0.999), length.out = 20)))) %>%
add_fitted_draws(m5, value = "lrr_rep", seed = 1234, n = 500) %>%
compare_levels(lrr_rep, by = causal_support) %>%
rename(slope = lrr_rep)
Effect of sample size
slopes_df %>%
group_by(n, .draw) %>% # group by predictors to keep
summarise(slope = weighted.mean(slope)) %>% # marginalize out delta_p and vis effects
ggplot(aes(x = slope, y = "")) +
stat_halfeyeh() +
theme_bw() +
facet_grid(n ~ .)
## `summarise()` regrouping output by 'n' (override with `.groups` argument)
Interaction of visualization and sample size
slopes_df %>%
group_by(n, vis, .draw) %>% # group by predictors to keep
summarise(slope = weighted.mean(slope)) %>% # marginalize out delta_p effects
ggplot(aes(x = slope, y = vis)) +
stat_halfeyeh() +
theme_bw() +
facet_grid(n ~ .)
## `summarise()` regrouping output by 'n', 'vis' (override with `.groups` argument)
Pattern of slopes across levels of delta_p
slopes_df %>%
group_by(delta_p, .draw) %>% # group by predictors to keep
summarise(slope = weighted.mean(slope)) %>% # marginalize out n and vis effects
ggplot(aes(x = slope, y = delta_p, group = .draw)) +
geom_line(alpha = 0.1) +
theme_bw()
## `summarise()` regrouping output by 'delta_p' (override with `.groups` argument)
Interaction of visualization and delta_p
slopes_df %>%
group_by(delta_p, vis, .draw) %>% # group by predictors to keep
summarise(slope = weighted.mean(slope)) %>% # marginalize out n and vis effects
ggplot(aes(x = slope, y = delta_p, group = .draw)) +
geom_line(alpha = 0.1) +
theme_bw() +
facet_grid(. ~ vis)
## `summarise()` regrouping output by 'delta_p', 'vis' (override with `.groups` argument)
Effect of visualization
slopes_df %>%
group_by(vis, .draw) %>% # group by predictors to keep
summarise(slope = weighted.mean(slope)) %>% # marginalize out delta_p effects
ggplot(aes(x = slope, y = vis)) +
stat_halfeyeh() +
theme_bw()
## `summarise()` regrouping output by 'vis' (override with `.groups` argument)